project_2_numdata = 
  read_csv("./data/project_2_data.csv", na = c("NA", "", ".")) |>
  janitor::clean_names()  |>
  mutate(status = ifelse(status == "Dead", 1, 0),
    t_stage = case_when(
    t_stage == "T1" ~ 1,
    t_stage == "T2" ~ 2,
    t_stage == "T3" ~ 3,
    t_stage == "T4" ~ 4,
    TRUE ~ NA_real_),
    n_stage = case_when(
    n_stage == "N1" ~ 1,
    n_stage == "N2" ~ 2,
    n_stage == "N3" ~ 3,
    TRUE ~ NA_real_),
    x6th_stage_num = case_when(
    x6th_stage == "IIA" ~ 1,
    x6th_stage == "IIB" ~ 2,
    x6th_stage == "IIIA" ~ 3,
    x6th_stage == "IIIB" ~ 4,
    x6th_stage == "IIIC" ~ 5,
    TRUE ~ NA_real_),
    differentiate = case_when(
    differentiate == "Well differentiated" ~ 1,
    differentiate == "Moderately differentiated" ~ 2,
    differentiate == "Poorly differentiated" ~ 3,
    differentiate == "Undifferentiated" ~ 4,
    TRUE ~ NA_real_),
    grade = case_when(
    grade == "anaplastic; Grade IV" ~ 4,
    grade == "3" ~ 3,
    grade == "2" ~ 2,
    grade == "1" ~ 1,
    TRUE ~ NA_real_),
    a_stage_regional = ifelse(a_stage == "Regional", 1, 0),
    estrogen_status = ifelse(estrogen_status == "Positive", 1, 0),
    progesterone_status = ifelse(progesterone_status == "Positive", 1, 0)
    ) |>
  select(-a_stage)
## Rows: 4024 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Race, Marital Status, T Stage, N Stage, 6th Stage, differentiate, ...
## dbl  (5): Age, Tumor Size, Regional Node Examined, Reginol Node Positive, Su...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
surv_object <- Surv(time = project_2_numdata$survival_months, event = project_2_numdata$status)
variables <- names(project_2_numdata)[!names(project_2_numdata) %in% c("survival_months", "status")]
for (var in variables) {
  formula <- as.formula(paste("Surv(survival_months, status) ~", var))
  model <- coxph(formula, data = project_2_numdata)
  print(var)
  print(summary(model))
}
## [1] "age"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 4024, number of events= 616 
## 
##         coef exp(coef) se(coef)     z Pr(>|z|)    
## age 0.015659  1.015783 0.004624 3.386 0.000708 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##     exp(coef) exp(-coef) lower .95 upper .95
## age     1.016     0.9845     1.007     1.025
## 
## Concordance= 0.545  (se = 0.013 )
## Likelihood ratio test= 11.62  on 1 df,   p=7e-04
## Wald test            = 11.47  on 1 df,   p=7e-04
## Score (logrank) test = 11.5  on 1 df,   p=7e-04
## 
## [1] "race"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 4024, number of events= 616 
## 
##              coef exp(coef) se(coef)      z Pr(>|z|)    
## raceOther -0.9978    0.3687   0.2098 -4.756 1.97e-06 ***
## raceWhite -0.5997    0.5490   0.1252 -4.791 1.66e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## raceOther    0.3687      2.712    0.2444    0.5562
## raceWhite    0.5490      1.822    0.4296    0.7016
## 
## Concordance= 0.541  (se = 0.008 )
## Likelihood ratio test= 27.23  on 2 df,   p=1e-06
## Wald test            = 29.95  on 2 df,   p=3e-07
## Score (logrank) test = 31  on 2 df,   p=2e-07
## 
## [1] "marital_status"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 4024, number of events= 616 
## 
##                             coef exp(coef) se(coef)      z Pr(>|z|)   
## marital_statusMarried   -0.33465   0.71559  0.11792 -2.838  0.00454 **
## marital_statusSeparated  0.74830   2.11341  0.27896  2.682  0.00731 **
## marital_statusSingle    -0.08987   0.91405  0.14397 -0.624  0.53249   
## marital_statusWidowed    0.13768   1.14761  0.17754  0.776  0.43804   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                         exp(coef) exp(-coef) lower .95 upper .95
## marital_statusMarried      0.7156     1.3975    0.5679    0.9017
## marital_statusSeparated    2.1134     0.4732    1.2233    3.6512
## marital_statusSingle       0.9141     1.0940    0.6893    1.2120
## marital_statusWidowed      1.1476     0.8714    0.8104    1.6252
## 
## Concordance= 0.549  (se = 0.011 )
## Likelihood ratio test= 26.45  on 4 df,   p=3e-05
## Wald test            = 30.18  on 4 df,   p=4e-06
## Score (logrank) test = 31.6  on 4 df,   p=2e-06
## 
## [1] "t_stage"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 4024, number of events= 616 
## 
##            coef exp(coef) se(coef)    z Pr(>|z|)    
## t_stage 0.46842   1.59747  0.04741 9.88   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## t_stage     1.597      0.626     1.456     1.753
## 
## Concordance= 0.6  (se = 0.011 )
## Likelihood ratio test= 91.12  on 1 df,   p=<2e-16
## Wald test            = 97.61  on 1 df,   p=<2e-16
## Score (logrank) test = 99.3  on 1 df,   p=<2e-16
## 
## [1] "n_stage"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 4024, number of events= 616 
## 
##            coef exp(coef) se(coef)     z Pr(>|z|)    
## n_stage 0.76586   2.15085  0.04779 16.03   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## n_stage     2.151     0.4649     1.959     2.362
## 
## Concordance= 0.651  (se = 0.011 )
## Likelihood ratio test= 230.8  on 1 df,   p=<2e-16
## Wald test            = 256.9  on 1 df,   p=<2e-16
## Score (logrank) test = 286.2  on 1 df,   p=<2e-16
## 
## [1] "x6th_stage"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 4024, number of events= 616 
## 
##                  coef exp(coef) se(coef)      z Pr(>|z|)    
## x6th_stageIIB  0.5223    1.6858   0.1335  3.912 9.17e-05 ***
## x6th_stageIIIA 0.9398    2.5594   0.1259  7.462 8.50e-14 ***
## x6th_stageIIIB 1.4959    4.4634   0.2458  6.085 1.16e-09 ***
## x6th_stageIIIC 1.8480    6.3474   0.1263 14.627  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                exp(coef) exp(-coef) lower .95 upper .95
## x6th_stageIIB      1.686     0.5932     1.298     2.190
## x6th_stageIIIA     2.559     0.3907     2.000     3.276
## x6th_stageIIIB     4.463     0.2240     2.757     7.226
## x6th_stageIIIC     6.347     0.1575     4.955     8.131
## 
## Concordance= 0.669  (se = 0.011 )
## Likelihood ratio test= 248.2  on 4 df,   p=<2e-16
## Wald test            = 262.1  on 4 df,   p=<2e-16
## Score (logrank) test = 310.5  on 4 df,   p=<2e-16
## 
## [1] "differentiate"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 4024, number of events= 616 
## 
##                  coef exp(coef) se(coef)     z Pr(>|z|)    
## differentiate 0.65269   1.92070  0.06485 10.06   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##               exp(coef) exp(-coef) lower .95 upper .95
## differentiate     1.921     0.5206     1.691     2.181
## 
## Concordance= 0.609  (se = 0.011 )
## Likelihood ratio test= 103.2  on 1 df,   p=<2e-16
## Wald test            = 101.3  on 1 df,   p=<2e-16
## Score (logrank) test = 102.5  on 1 df,   p=<2e-16
## 
## [1] "grade"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 4024, number of events= 616 
## 
##          coef exp(coef) se(coef)     z Pr(>|z|)    
## grade 0.65269   1.92070  0.06485 10.06   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##       exp(coef) exp(-coef) lower .95 upper .95
## grade     1.921     0.5206     1.691     2.181
## 
## Concordance= 0.609  (se = 0.011 )
## Likelihood ratio test= 103.2  on 1 df,   p=<2e-16
## Wald test            = 101.3  on 1 df,   p=<2e-16
## Score (logrank) test = 102.5  on 1 df,   p=<2e-16
## 
## [1] "tumor_size"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 4024, number of events= 616 
## 
##                coef exp(coef) se(coef)     z Pr(>|z|)    
## tumor_size 0.013357  1.013446 0.001521 8.781   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##            exp(coef) exp(-coef) lower .95 upper .95
## tumor_size     1.013     0.9867      1.01     1.016
## 
## Concordance= 0.609  (se = 0.012 )
## Likelihood ratio test= 65.65  on 1 df,   p=5e-16
## Wald test            = 77.1  on 1 df,   p=<2e-16
## Score (logrank) test = 78.53  on 1 df,   p=<2e-16
## 
## [1] "estrogen_status"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 4024, number of events= 616 
## 
##                    coef exp(coef) se(coef)      z Pr(>|z|)    
## estrogen_status -1.2994    0.2727   0.1060 -12.25   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## estrogen_status    0.2727      3.667    0.2215    0.3357
## 
## Concordance= 0.572  (se = 0.008 )
## Likelihood ratio test= 112.5  on 1 df,   p=<2e-16
## Wald test            = 150.1  on 1 df,   p=<2e-16
## Score (logrank) test = 172.5  on 1 df,   p=<2e-16
## 
## [1] "progesterone_status"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 4024, number of events= 616 
## 
##                         coef exp(coef) se(coef)      z Pr(>|z|)    
## progesterone_status -0.95638   0.38428  0.08565 -11.17   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                     exp(coef) exp(-coef) lower .95 upper .95
## progesterone_status    0.3843      2.602    0.3249    0.4545
## 
## Concordance= 0.602  (se = 0.01 )
## Likelihood ratio test= 109.2  on 1 df,   p=<2e-16
## Wald test            = 124.7  on 1 df,   p=<2e-16
## Score (logrank) test = 134.5  on 1 df,   p=<2e-16
## 
## [1] "regional_node_examined"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 4024, number of events= 616 
## 
##                            coef exp(coef) se(coef)     z Pr(>|z|)  
## regional_node_examined 0.011017  1.011078 0.004842 2.275   0.0229 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                        exp(coef) exp(-coef) lower .95 upper .95
## regional_node_examined     1.011      0.989     1.002     1.021
## 
## Concordance= 0.524  (se = 0.012 )
## Likelihood ratio test= 5.05  on 1 df,   p=0.02
## Wald test            = 5.18  on 1 df,   p=0.02
## Score (logrank) test = 5.17  on 1 df,   p=0.02
## 
## [1] "reginol_node_positive"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 4024, number of events= 616 
## 
##                           coef exp(coef) se(coef)     z Pr(>|z|)    
## reginol_node_positive 0.077766  1.080869 0.004682 16.61   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                       exp(coef) exp(-coef) lower .95 upper .95
## reginol_node_positive     1.081     0.9252     1.071     1.091
## 
## Concordance= 0.659  (se = 0.012 )
## Likelihood ratio test= 191.2  on 1 df,   p=<2e-16
## Wald test            = 275.9  on 1 df,   p=<2e-16
## Score (logrank) test = 300.4  on 1 df,   p=<2e-16
## 
## [1] "x6th_stage_num"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 4024, number of events= 616 
## 
##                   coef exp(coef) se(coef)     z Pr(>|z|)    
## x6th_stage_num 0.45643   1.57842  0.02806 16.27   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                exp(coef) exp(-coef) lower .95 upper .95
## x6th_stage_num     1.578     0.6335     1.494     1.668
## 
## Concordance= 0.669  (se = 0.011 )
## Likelihood ratio test= 247.8  on 1 df,   p=<2e-16
## Wald test            = 264.6  on 1 df,   p=<2e-16
## Score (logrank) test = 286.1  on 1 df,   p=<2e-16
## 
## [1] "a_stage_regional"
## Call:
## coxph(formula = formula, data = project_2_numdata)
## 
##   n= 4024, number of events= 616 
## 
##                     coef exp(coef) se(coef)      z Pr(>|z|)    
## a_stage_regional -1.1399    0.3199   0.1741 -6.547 5.86e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## a_stage_regional    0.3199      3.126    0.2274    0.4499
## 
## Concordance= 0.522  (se = 0.005 )
## Likelihood ratio test= 31.23  on 1 df,   p=2e-08
## Wald test            = 42.87  on 1 df,   p=6e-11
## Score (logrank) test = 47.71  on 1 df,   p=5e-12

Based on the univariate Cox model results, the final model should include statistically significant and clinically relevant variables. These include age (p = 0.0007, HR = 1.016), race (p < 0.001, indicating disparities), marital status (significant for Married and Separated), t_stage (p < 2e-16, HR = 1.597), n_stage (p < 2e-16, HR = 2.151), differentiate (p < 2e-16, HR = 1.92), grade (p-value: < 2e-16, HR = 1.921), x6th_stage(p < 2e-16, HR = 1.578) tumor size (p < 2e-16, HR = 1.013), estrogen status (p < 2e-16, HR = 0.273), progesterone status (p < 2e-16, HR = 0.384), regional node positive (p < 2e-16, HR = 1.081), and a_stage (regional) (p < 2e-08, HR = 0.32). Variables like Single and Widowed marital status (p > 0.05) should be excluded. But we need further check multicollinearity and other for final dicision

cor_matrix <- cor(project_2_numdata[, sapply(project_2_numdata, is.numeric)])
print(cor_matrix)
##                                 age     t_stage      n_stage differentiate
## age                     1.000000000 -0.06691424  0.002883209   -0.09929371
## t_stage                -0.066914236  1.00000000  0.277014535    0.13147030
## n_stage                 0.002883209  0.27701454  1.000000000    0.16250134
## differentiate          -0.099293714  0.13147030  0.162501337    1.00000000
## grade                  -0.099293714  0.13147030  0.162501337    1.00000000
## tumor_size             -0.077214971  0.80917552  0.277904717    0.11936737
## estrogen_status         0.059787319 -0.06095383 -0.101994729   -0.21125105
## progesterone_status    -0.021268794 -0.05763326 -0.093722800   -0.17986002
## regional_node_examined -0.033345483  0.11410205  0.328276140    0.08441631
## reginol_node_positive   0.012585513  0.24307493  0.838073333    0.13531890
## survival_months        -0.009389560 -0.08571763 -0.139576527   -0.06766924
## status                  0.055921310  0.15469948  0.255771945    0.16139784
## x6th_stage_num         -0.018741051  0.60671104  0.881880081    0.18694337
## a_stage_regional        0.020859878 -0.22112353 -0.260572948   -0.03945904
##                              grade  tumor_size estrogen_status
## age                    -0.09929371 -0.07721497      0.05978732
## t_stage                 0.13147030  0.80917552     -0.06095383
## n_stage                 0.16250134  0.27790472     -0.10199473
## differentiate           1.00000000  0.11936737     -0.21125105
## grade                   1.00000000  0.11936737     -0.21125105
## tumor_size              0.11936737  1.00000000     -0.05958454
## estrogen_status        -0.21125105 -0.05958454      1.00000000
## progesterone_status    -0.17986002 -0.06987929      0.51330798
## regional_node_examined  0.08441631  0.10435180     -0.04483576
## reginol_node_positive   0.13531890  0.24232172     -0.08598523
## survival_months        -0.06766924 -0.08690124      0.12846902
## status                  0.16139784  0.13420512     -0.18465036
## x6th_stage_num          0.18694337  0.51397121     -0.10561449
## a_stage_regional       -0.03945904 -0.12388287      0.06557043
##                        progesterone_status regional_node_examined
## age                            -0.02126879            -0.03334548
## t_stage                        -0.05763326             0.11410205
## n_stage                        -0.09372280             0.32827614
## differentiate                  -0.17986002             0.08441631
## grade                          -0.17986002             0.08441631
## tumor_size                     -0.06987929             0.10435180
## estrogen_status                 0.51330798            -0.04483576
## progesterone_status             1.00000000            -0.01805070
## regional_node_examined         -0.01805070             1.00000000
## reginol_node_positive          -0.07806852             0.41157970
## survival_months                 0.09601832            -0.02205421
## status                         -0.17707930             0.03477200
## x6th_stage_num                 -0.10124555             0.31721872
## a_stage_regional                0.02652963            -0.06901029
##                        reginol_node_positive survival_months      status
## age                               0.01258551     -0.00938956  0.05592131
## t_stage                           0.24307493     -0.08571763  0.15469948
## n_stage                           0.83807333     -0.13957653  0.25577194
## differentiate                     0.13531890     -0.06766924  0.16139784
## grade                             0.13531890     -0.06766924  0.16139784
## tumor_size                        0.24232172     -0.08690124  0.13420512
## estrogen_status                  -0.08598523      0.12846902 -0.18465036
## progesterone_status              -0.07806852      0.09601832 -0.17707930
## regional_node_examined            0.41157970     -0.02205421  0.03477200
## reginol_node_positive             1.00000000     -0.13521385  0.25663809
## survival_months                  -0.13521385      1.00000000 -0.47651426
## status                            0.25663809     -0.47651426  1.00000000
## x6th_stage_num                    0.77396309     -0.14483728  0.25763588
## a_stage_regional                 -0.23284888      0.07010906 -0.09658422
##                        x6th_stage_num a_stage_regional
## age                       -0.01874105       0.02085988
## t_stage                    0.60671104      -0.22112353
## n_stage                    0.88188008      -0.26057295
## differentiate              0.18694337      -0.03945904
## grade                      0.18694337      -0.03945904
## tumor_size                 0.51397121      -0.12388287
## estrogen_status           -0.10561449       0.06557043
## progesterone_status       -0.10124555       0.02652963
## regional_node_examined     0.31721872      -0.06901029
## reginol_node_positive      0.77396309      -0.23284888
## survival_months           -0.14483728       0.07010906
## status                     0.25763588      -0.09658422
## x6th_stage_num             1.00000000      -0.29196180
## a_stage_regional          -0.29196180       1.00000000
cortable<-project_2_numdata|>
  select(-race, -marital_status,-x6th_stage)
cortable$regional_node_examined
##    [1] 24 14 14  2  3 18 11  9 20 21  9 11 13 23 16 20  1 22 16 20 15 15  4 18
##   [25] 26 31 25 14 14 10  3  5  6 19 19  9 21 14 10 15 12 12  2 15 11 10 15 12
##   [49] 16  8 17  9  7 20 16 13 19 49 15 17 24 12  1 20 16  5 23 20  5 12 12 24
##   [73] 16 15 19 13 11  7  9  9 21 10  9  2 13 12  7 10 11  9  8 15  5 21 12 17
##   [97]  4 10  3 14 33 25  3  7 14  2 11 12  8 30 15  3 34 14  6 12  9 16 15 15
##  [121]  7 15 16 23 23 16 13  3  8 13 14 10  8  5  8 28 12 20 14 18  4 13 24 23
##  [145] 20 15 18 13  1 13 18 14 15 14 14 16 15  6  2 14 18  3  2 20 16  3 12 24
##  [169]  8 14 20 11  9  9 24 11 17  7 31 11 32 23  3 15 14 14  3 14 34 14 12  2
##  [193] 19  5 16  5 16 10 22 11 14 16  1 17  2  6 24 18 12  6 11 17 18  9 10  4
##  [217] 12 17  4 27 14 20 18  9  2 27 20 42  9 15 19  7 22  2 15 26 26 12 28  8
##  [241] 29 30  9 18 15 18  9 18 15 11  6  5  2 13 19 20 13  7 20 12 11  9  9  3
##  [265] 33  3 33  3  4 20 13 20 18  9 11  2  4 23 14 14 16 12 18 12 14  7 13 12
##  [289] 30 41 39 17 11 19 14 10  1 11 21 13 18 23 30 46 25 10 22 10  4 19  4 15
##  [313] 14 27 12  4 14 14 28 12 18 15  8 19 16 15 14  1  9 10 24 19  6 19 13 11
##  [337]  4 13 19 13 12 19 17  9 23 12  5 16 29 17 24 16  6  7  1 18 23  5  3 20
##  [361] 10 23 24  2 11 16 19 25 14 12 30 16  2 24 26 14 14 16 18 15  5  6 17 11
##  [385] 22  3 16 10 14  7 20 14 21 18 40 20 14  6 18  8 31 13 14  7 13 20 12  9
##  [409]  1  9  7 19  3  9  9  2  7 20 39  7 27 17 10  8  8 14 20 11 10 10 11 25
##  [433]  1  9 15 19  9  2 25  5 21 17 10 29 20  3 26  6 11 14  2  2 17  9 14 19
##  [457] 20 16 13 10  6 18 18 11  7 11  3  4 12 12  9  3  4 16  7 32  9 28  9 17
##  [481] 18  6 25 16  1 23  9 18 17 18 51 15  7 10 18 23 23 16 19 20 24 15  5  4
##  [505] 22 12  9 15 12 14 17  9 13 13 15 13 28 14 13 13 11 33 29 21  1 17 44  2
##  [529] 16  3 38 14 18 16 17 24 18  7 21 23 23 13  3  9 33  5 15 16 22 20 47 15
##  [553] 13 17 14  1 22  9 14 15 11 19  1  2 17 14  9 27 18 24 16 25  9 26 31 14
##  [577] 16 14 14  5 13 14 19 11 29 19 19 11 10 12 29  4 15 19 13 10  4 13 15 18
##  [601]  3 16 40 27 22  7 16 15 10 15  6 17  8  8 13  8 10  2  5 11  6 23  1 15
##  [625] 24  8 17  4 15 13 26  2 18 54 28  2  6  1  2  2 13  5 14 27 28 11 18 17
##  [649] 12 31  2 24  3  7 26  3 28 24 11 21 14  7 39 19  1  8 16 19 28 12 19  4
##  [673] 19 12 23 17 17 13 10 13 14 22 11  9 18 32 12 13  8 19 16 12 32 20 14  8
##  [697] 18 27 26 19 12 36 19 12 27  5 21  2 15  2 16 26 19  2 14  2 11  4  8  4
##  [721] 14 20 20 14 27 29 24  5  7 22 17  8 22 16 23 10 12 16 26 15 14 11  9  1
##  [745]  8  9 21 26 25  2 13 10 23 10  6 33 10 19  5 13 13 31  3  6  2 24 19 17
##  [769]  2 13 11 19 17 12  7 18 15 20 16 10  2  6 10 30  9 11 18  3  2 15 38  2
##  [793]  9 15  2 11 14  1 25 18 15 27 19  6 17  1  6 14 12  9 14 13  5 20 13 29
##  [817] 21  2  1  9  2  1 20 19 26 24 14  6 13 17  5  1 14 26  3 23  5  7 36 15
##  [841] 19 31 14 16 12 19 20 10 12 17  5  9 19  3  4 25  6 32 15 16 31 16  7 21
##  [865] 18 15 20 10 31 13  3  3 17  6 21 17 16 20 10 16 25 15  6 17  6 18 25 10
##  [889] 10  7 28 10 25 38 26 20  9 14 32 25  4 12  3 11  7  9 17 24  9 29  2 32
##  [913] 13 18  4 18  7 19  3 13 19 19 27 18  8 26 10  5  1 23 10 11 16 12 15 13
##  [937]  3 14 21 14 23 61  7  1 23 26 10 11 14 15 16 10 10  8  5 34 16 11 22 23
##  [961]  2 15 11 32 21 12 14 19 10 14  9 17  9 20  5 10 32  2  9 18  5 19  9 13
##  [985] 17  3  2 26  9 41 17 18 10  8 11 26 21  9 20 22  3 19 22  8 23 13  5 21
## [1009] 17 16 15 20 24 10 17  3 23 16 17 17 18 17  6 17 16 15 20  4  6 18 15 29
## [1033] 18 18  2  6 18 13  6 30 23 25  9 19 10 17  9 10 13  8 14 15 17 23  7 16
## [1057] 10 10 24  7 19 25 19 19 27 19 18  9 16 22 12 16 23 29  5 13 26  4  5  9
## [1081] 30 26 12 15  4 12  1 28 15 22 33 28 23 23 18 22  9  6 10 15 11 15 21 10
## [1105]  1 18 14 22 11 20 16 10 12 11  8  2 26 10 16 16 30  2 11 11 11  6  4  4
## [1129] 37  2 17  4 23  2 10 11 10 14 10 21 12 13 17  9 16 13 22 11  1 18  8 11
## [1153] 14 14 18 10  4  9 12 15 17 11 10 21 16 13 23  8  8 20  7 12  3  6 13 13
## [1177] 22  4  5 23  4 18 21 13  7 14 12 20  3 12  9 22 21  7  2  1 15 13  5 28
## [1201] 15 12 19  9  8 16  9 17  9 14 18 28 18 12  2  9 15 16 26 15  8 18 15 10
## [1225]  3 15 13  3  6  8  7 19 13 25  5  9  5 16  4 14 11 14 14  3 16 18 26 10
## [1249] 20 10  5  3  4 11 15 27  2 19 22  5 20 20  4  3 16 21 17 36  5 15  8  9
## [1273] 13 15  2 10 10  8 26 21 14 14 18 10  4 10 12 32 13 26  6 13 13 12  6 11
## [1297] 20 21 18 28 14  3 22  2 13 21 22 16 13 16  5  1  1  4 32 16 11 13 24 28
## [1321] 30 16 16  3  8  2  6  9  2 16 10 13  1 23 26 21  4 13 13 14 30  5  3 18
## [1345] 14 15 21 14  7 14 11 20 16 17 11 20 13  4  5 13 16  8 14  4  9  6 21 13
## [1369] 15  4 14 15 18 24  4 10 20 16 19 21 13 15 29  8 15 15 24 16 21 28  9 14
## [1393] 10  3 10  1 25 34  1  9 23  4  1 13  9  6 15 14  8 18 11 27  7 13  9 14
## [1417] 13 15 15 14 17 16 14  3 13 13 28 20  5 28 23  3 12 23 14 17 14 24 27 21
## [1441] 16 19 11 23  3  3 10  8 14 11  9 13 25 13 12 13  2 13 14 36 21 24 35  4
## [1465]  7  3 11 27 13 29  6 17 11 11 31 13 12 12  3 16 17 19  5 12  9 15 23  9
## [1489] 23  2  5 12 17 17 12 16 13 11 25 20 15  9 11 26 13 20 12 13 21 20  9 13
## [1513]  9  4  3 16 27 14 15 16 40 17 13 22 19  1 32  3  2 10 15 22  1 20 16  2
## [1537] 21 11 16 31 15 14 14 16 16 10 15 12 19  7  5  4 10 25 16  4 20 17  6 11
## [1561] 47 24 15  9 17 12 16 29  4 11 13  4 16  9 17 18 28  9 11  9 43  9  7  9
## [1585] 32  9 20  5 11 24  6 39 18  8 16  8 14 26 18 25 13 17  8 13 22 10 10  9
## [1609] 18 21 22 20  8 24 18 11  7 19 20 23 25  9  9 19 49  9  7  7 18 11  6 16
## [1633] 17  4 17 11  4 25 25 27 19 18 21 16 12 26 15 16 12 18 23 27 18  8  8  9
## [1657] 16 17 18  4 17 20 20 13  4 24  6 29 11  1 18  6 25 18  7 12 14  6 13 13
## [1681] 20  5 14 15 21 13 13  3 19 12 19 10  5 15 16 20 13  7 13  7 20 26  9  6
## [1705] 21 10  6 22 16 21  3 25 16 19 11 19 10  6 24 13 20 18  2 27 15 11 11  8
## [1729] 24  8 10 21  6 11 23  1  5  2 27 27  1 21 11 16 20  8 19  9 15 24 14 26
## [1753] 16 18 17 14  9 20 15  2 11 14 10 12 27 15 16  1 15  4 15 23 16  6  9 14
## [1777]  9 13  3 16  9 16 10 16 26 11 16 10 29 21 25  9 12 24  2 17 10 19  5 13
## [1801]  6 13 13  6  8 11  3 29 11  1  7  1 22  4 34  7  3  5 17 15 18 15  6 10
## [1825]  9 20  9 11  9 18  1 14  6 22  8 15 15  8 11 23 14 28  6  8 11  8 19 20
## [1849]  3 10 16  4  9 24  4 12 10  6 12 13 25 18 12 16 14  4 15 13 13 20 11  8
## [1873] 35 18 15 12  1 16 12 10 23  8 16 24 14 24 10 24 17  9 30 13 23 15 20  1
## [1897]  8 19 16 27 15  9  9 25  2  9  7 18 19 13  8  9  6 14 15 15 19 16 23  2
## [1921]  3 17  1 20  5  1 27 15 23  3 13 10 14 15 18 25 21 14 11 17 13 10 13 15
## [1945] 18 13 12 11 20 22 11 13  8 17 12 15 11  2 11 10 18 14 19 21 14  3  3 13
## [1969] 17  6 24 14 24 17 17 12 15  4  9 29 18  5  3 18 19 21 12 10 17  7 18  4
## [1993] 18  7 17  8 22  2  4 16 30  7 13 16  1  7 14  5 25  9 12 14 15  1 22 17
## [2017]  7  6 24  5 13 17 15  7 21 25 17 16 37 13  8 33 24 28 24 13 22 33 21 11
## [2041] 14 12 19 13 26 19  3 12 14 15 15 12  4  3  9 12 41 16 22  8 13 17  3 19
## [2065]  1 21  2 16 12  6  4 11 13 26 23 11 10 22 14  8  4  3 17  2 23 17  3 13
## [2089] 19 14 23  8 13 24 12 11 11 17  2 21 16 15  9  2 17 14 20 12 16 27 13 29
## [2113] 10 24 19 14 14  9 19 19  7 20 16 13 16 11 19  5 36 11 20  5  5 28 52  7
## [2137] 23 15  9 11 13 15 41 15  1 23  8 23 26 12 20 12  3 16  2 23 16 11 11 37
## [2161] 14 15 15 34  8 21  7 27 22  3 21  1 10 13  7 11 11 10 23  2 24 14 41 17
## [2185]  9 12  1 28  9 13  8 22  3 23 18  2 12 15 16 18 19 17 10  2  5  2 10  8
## [2209] 14  2 19 17 12 35  7 19 21  2 19 13  4  1 16 13  2  1  3 22 17 23 25  9
## [2233] 10 11 20 24 14 15 15  7 18 16 13 45 17 16 12  9 36 18  3 20 14 18 22 31
## [2257] 17 12 11 12 16  8  1  1 11 19 18  4 15  1 10 22 26 11 29 18 13  4 18 18
## [2281]  9 21 13  8 13 18 21 26 10  2 12 13 12 35  4 23 12  1 10 12  8 32  6 15
## [2305] 29 20  6 17 18  9 15  2  6 16 18 10 13 14 16 14 10 14 20 35 12 12 13 14
## [2329]  2 21 32  8  3 10 23 15 14 15 16 12  7  4 16 14 17  5 16 13 12 14 31  8
## [2353] 27 15 32 30  9 21 18 24 15 18 18 22 16  9 20 16 20  4  5  6 28  1  2  9
## [2377] 35 23 16 16 10 15 16 10  6 17 14  8 17 18 12 28 18 23 32 17  1 20 17 13
## [2401]  7  9 10  5 16  9 11 30  9 19  9  5 14  8 21  8 18 16 13  1  3  9 13 12
## [2425] 21 30 17 11  3  7 12 11 11  2 10  8  8  7 28 27 24  4 22 15  2  9 14 28
## [2449] 13  2 13 10 11 21 19 30  8 20  2 14 43  8 57 14  5 16 16 12 13 21  9  6
## [2473] 17  5 19  9  8 14 12 16  5 28 19 14 25 18 14 22 13 22 19 16 12  9 15 30
## [2497] 13 10  8 21 12  3  3  8 11 15 11 26 20 10 25  5 19 19 13 15  8 10  9 15
## [2521] 10  4 17  8  4 26 11 16 25 13 11 14 13 16  9 13  8 13 13  6 13  5  7  2
## [2545] 21 23 26  1 13  6 16 18 20 17 19  1 25 12 25 10 17 10  4 17 18 25 17  9
## [2569] 28 13  2 17 18 24  8 21 12 15 24  5  9 14 17 26  6 17 12 15 12  8 14 16
## [2593]  7 10  5 16 13  1 14  9 10 22 12 37  9 12  2 36 11 19 12 12 12 28 18  8
## [2617] 15 23 13 26  5  1 16 11 21 13 17 15 18  8 17 18 23 17 17  2 19 10 30  8
## [2641] 15 17 10 11 12 18 20 17 13 22 13  5  7 14 13  7  4  4 12 11 19 11  2  9
## [2665] 12 26 11  1 15 13 17 12  9 20 13 14  5  3 13  4 18 27  4  5 14  2 18  6
## [2689] 16  3  3  4 19 10 18 24 13  3  7  9 20  4  6 32  7 21  6 14  5  2 14  8
## [2713] 12 14 19 13 21 11  4 21 11 21 32 12  2 43 13 14  5 16 13 10 15 14 19  8
## [2737]  8 21 12 16 18 11  5 17 18 19  6  6 13 15  6 12 17 39 12 26 24 17 15  9
## [2761] 26 16  6 25  8 11 26 25 10 13 16 19 20 17 22 20 11 11 18 15 23  9 22  7
## [2785]  2  3  5 16 10 23 33 24 10 12 16 10  2 16 18  6 29 23 11 13  7 12 16 23
## [2809]  5  8 10 24 19  1 14  9  5 14  4 22 22 12 17  2 22  9  8 17 26 22 23  9
## [2833]  1 21 12 26  8 19 15  4 14 11 27 16  9 26 13  2 15  9 26 15 10 19 10  2
## [2857] 10 16 13 11 23 11  7 19 21  7 20 12  8  1 11  9  8 32 24  9 17  8 13 12
## [2881] 15 12 24 12 12 10  9 47 17  4  9 23  8 10 22 14 27  6 12 19 16 14 21 16
## [2905] 18 14 16  3 17 20 22 12  9 14  9 14 13 22 22 14 17  3 15  6 13  2  2 14
## [2929] 30 22 10  2 15  9 35 11 14  1  6 19 17 11 24 33  4 10 14 10 10 27 13  7
## [2953] 30 14 14 19  3 14 18  8 21 27 17 12  6  8 30 28 18  8 16 13 20 13  6 36
## [2977] 12 13  5 17  2  8 23 10 12 13 29 10 14  3  8  5 24 29 16 32 19 17 29 12
## [3001] 20  7  1 18  8 13  8 17 24 19 12 12  9 10 16 14 11 34  2 13 14 17 20 18
## [3025] 24  5 27  1 13 15 29 16 25 14  2  9  4 10 12  8  9 18  6 37 33 13  9 15
## [3049] 24 11  9 11  9 21 19 12  3 12 24  3  9 12 19 15 35  8 14 10 12  4 19  9
## [3073] 22 16  7 11  4 17  5 13  8 16 23 15  9  8  8 10 15 28 21  6 17  5  3 17
## [3097] 17 35 14 21 11 12 27 18 14 19 12 11 16 12 18 17 18 24 15  8 18  1  8 25
## [3121] 19 26  8 18 22 13 15  9 29  4 17  9 47 25 15 11 13 19 22 23 14 16 27 29
## [3145]  7  8  1  1 25 40 19 28 23 10 12 17 23 14 41 13 13 16 16 19  5 24 13  4
## [3169]  2  9 10  8 24  7 14  6  6 24  5 10 14 20 13 26 21 13 14 11  9 11 15 12
## [3193]  5 18 30  4 17 10 13  1 19 24  4 26 17  8 19 20 16 13 16 25 19 16  3 12
## [3217] 15 14 37  2  9 18 11  1 27 15  1 16  1 26 22 16 10 18 12 11  4 21  7  9
## [3241]  6 21 12 21 17 22  5 14  5  2 17 18  2 11 18 11 11  6 10 13 17 15  4 15
## [3265] 26 33 19 29 18 10  5 11  9 22 15 14 13  2  8  9 15 16  2 12 15  5 16  8
## [3289] 47 10 13 15  9  5  8 15 13 13 27 13 20 13 20 21  4 25  8  6  2 20 15 11
## [3313] 15 21  1 20 16 21  2 16  5 10 20 12 19 15 35 23  3  9 21 19 21 17  5 24
## [3337] 14 27 14 13 19  8  3 26  2  2 18  3 18 21 18  7 18 20 16 22 11 21 10 13
## [3361]  9 10  4 17 12 16 15  9 16 19 14 11 17  7 23 13 13 16  3  5  9 10 16 27
## [3385] 15 13 31 13 10 15  6 23 17 23  4 20  4 12 14  9  8 30  5 15 13  2  7 16
## [3409]  7 14 13 29 13  1 21  8 12  3  8  9  6  7  2 12 17 16 14  5 18 11 16 24
## [3433] 11 21  7 16 16 14 37 11  1 17  6 15  3  1 15  6 10 18  5 12 23  6 51 20
## [3457]  5 14 12 11 16  8 21 34 10  5 27 14 17 19  3  2 10 14 11  9 18 12 16 22
## [3481] 18 18 12  8 13 13 13  4 19 16  6 12  7 51 11  9 12 16 17 16 11 21 15  4
## [3505] 16 25  5 27 18  9 19 10 10 23 20 22  1  5 17 11 14 24 19  6 14 11 12 14
## [3529]  6 19 17 14 14 18 13 30 29  8 17 19  4 10 23  8 17  9  3 12 23 20 12  8
## [3553]  7  1 15 13 10 13 21 20  6 13 13  3 12 27 11 14 19 27  4  2  9 13  9 12
## [3577] 13 18 12 13  1 13 31 14  3 20  3 12  6 19 12  8 13  2 12 28 16 10  3  2
## [3601] 14 24  5 18 21 19 22 20 18 26 28 10 17 24  5  4 18 13  4 12 31 20 18 17
## [3625] 15 10 15  1 18  3 10 24  8 16  2 16 12 13  4  6 26  6 11  7 11 13 26  6
## [3649] 17  8 18 21 28 28 11  4  8 17 24 17  8 13  2 16 18 13  3 10 15 13 13 30
## [3673] 12 17 26 16 10 30 23 12  1 22  9 13  8 28 21 17 20  2 13 18  8 11 18  6
## [3697] 14  3 18 42 14 18 21 19 10  7 11 17 38 24 12 13  9 20 11 15 12  6 12 16
## [3721] 20 11 16  8 11  4 12 10  6 11  7  2 19 21 15  7 16 19  2 11  2 22  3 15
## [3745] 15 17 16 17 12  6  2 14 25 25 16  9 17  6 21  2 23 26 13 11 19 14 12 11
## [3769] 15 25 11 47 19 10 14  9 11  2 16 19  9 16 15  7  3 13  6 15 18 19 12 28
## [3793] 22 18  2 19 20 16 23 16  4 20 16  1 18 21  2  1  8 20  9  7 31 21  3 15
## [3817] 28  8  9 11 22  8 26 16 25 22 10 13 28 18 26 13 12 11 11  5 16  1 19 21
## [3841] 27 18 18  3  6 23  2 20  2 22 14  6 18 11 22 17  7  7  3 11 16  4 12  1
## [3865] 25 31  7 19 21 14 18 12 28 11 14 16 15 17 13 17 17  5 20  2 16  7 12 16
## [3889] 21 14 16 18  8 40 23 17 15 25 32 11 10  9 13  8 17  3 13  5 23 14 19 24
## [3913] 13  6 18 29 14  7  4 24 22  9 10 36 22 20 10 29  7 12 14 23 20 24 27 16
## [3937] 24  4  4  3 14  8 31 10  1 16  1  9 25 13 60 10 10  9  5 13 12 37 24 28
## [3961] 28  4 18 14  1 23 25 14 14 10 15  6  7 12 16 13  9 18 31 21 22 13 20 14
## [3985] 19 22  6 12 23 12 39 13 23 27 19 17 21 15 11 12 13 16  5 21 19 14 18 10
## [4009] 19 20  6 24 20 10 22  4 29  2 11  1 14 11 11  7
chart.Correlation(cortable, histogram=TRUE, pch=19)
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter

We have to choice 1 between differentiate - grade t stage - tumor size n stage - regional_node_possitive - x6th_stage_num

muldata = project_2_numdata |>
  select(-differentiate, -x6th_stage_num)
  
cox_model <- coxph(Surv(survival_months, status) ~.,
                   data = muldata)

vifs <- vif(cox_model)  
## Warning in vif.default(cox_model): No intercept: vifs may not be sensible.
print(vifs)
##                             GVIF Df GVIF^(1/(2*Df))
## age                     1.108043  1        1.052636
## race                    1.083424  2        1.020234
## marital_status          1.227326  4        1.025935
## t_stage                 4.289067  1        2.071006
## n_stage                16.221966  1        4.027650
## x6th_stage             38.451047  4        1.578024
## grade                   1.138415  1        1.066965
## tumor_size              2.829424  1        1.682089
## estrogen_status         1.623845  1        1.274302
## progesterone_status     1.546881  1        1.243737
## regional_node_examined  1.726548  1        1.313982
## reginol_node_positive   4.397039  1        2.096912
## a_stage_regional        1.230259  1        1.109171

The VIF analysis reveals significant multicollinearity for n_stage (VIF = 10.94, GVIF^(1/(2Df)) =3.31), x6th_stage_num (VIF = 1.56, GVIF^(1/(2Df)) =1.25), reginol_node_positive (VIF = 4.13, GVIF^(1/(2Df)) =2.03), and t_stage (VIF = 3.83, GVIF^(1/(2Df)) =1.96), indicating redundancy. Tumor_size shows moderate multicollinearity (VIF = 2.62, GVIF^(1/(2Df)) =1.62), while other variables have acceptable VIFs near 1. Variables with high multicollinearity should be reconsidered for exclusion. also shows we need to remove. So I decided to remove t_stage, n_stage, reginol_node_positive,

final = muldata |>
  select(-t_stage, -n_stage, -reginol_node_positive)
  
cox_model <- coxph(Surv(survival_months, status) ~.,
                   data = final)
vifs <- vif(cox_model)
## Warning in vif.default(cox_model): No intercept: vifs may not be sensible.
print(vifs)
##                            GVIF Df GVIF^(1/(2*Df))
## age                    1.108302  1        1.052759
## race                   1.060132  2        1.014705
## marital_status         1.149711  4        1.017592
## x6th_stage             1.930448  4        1.085694
## grade                  1.133430  1        1.064627
## tumor_size             1.328934  1        1.152794
## estrogen_status        1.585994  1        1.259363
## progesterone_status    1.528070  1        1.236151
## regional_node_examined 1.222324  1        1.105587
## a_stage_regional       1.197044  1        1.094095
library(survival)
ph_test <- cox.zph(cox_model)
print(ph_test)
##                         chisq df       p
## age                     0.144  1   0.704
## race                    1.296  2   0.523
## marital_status          2.191  4   0.701
## x6th_stage              4.280  4   0.369
## grade                   2.425  1   0.119
## tumor_size              1.216  1   0.270
## estrogen_status        30.810  1 2.8e-08
## progesterone_status    32.831  1 1.0e-08
## regional_node_examined  0.188  1   0.665
## a_stage_regional        4.845  1   0.028
## GLOBAL                 54.550 17 8.1e-06
plot(ph_test) 

The Cox model assumes that hazard ratios are constant over time. A non-significant p-value (p > 0.05) indicates that the PH assumption holds. As GLOBAL 50.520 14 5.0e-06, the model did not meet the assumption, as same as estrogen_status, progesterone_status, and a_stage_regional. We need further improve our model.

finalmodel <- coxph(Surv(survival_months, status) ~ age + race + marital_status +
                            grade + tumor_size + 
                            regional_node_examined + x6th_stage,
                            data = final)


ph_test <- cox.zph(finalmodel)
print(ph_test)
##                          chisq df     p
## age                     0.0977  1 0.755
## race                    1.4006  2 0.496
## marital_status          2.6540  4 0.617
## grade                   3.0281  1 0.082
## tumor_size              1.6594  1 0.198
## regional_node_examined  0.2829  1 0.595
## x6th_stage              4.3089  4 0.366
## GLOBAL                 12.6117 14 0.557
plot(ph_test) 

cox_model$concordance
##   concordant   discordant       tied.x       tied.y      tied.xy  concordance 
## 1.435780e+06 5.080060e+05 1.000000e+00 2.219000e+03 0.000000e+00 7.386511e-01 
##          std 
## 1.058639e-02
finalmodel$concordance
##   concordant   discordant       tied.x       tied.y      tied.xy  concordance 
## 1.391238e+06 5.525470e+05 2.000000e+00 2.219000e+03 0.000000e+00 7.157363e-01 
##          std 
## 1.074139e-02

No Clear Trends: If the solid line remains flat (close to zero), it indicates that the PH assumption is satisfied for that variable. Upward/Downward Trends: A visible trend or deviation indicates that the proportional hazards assumption may be violated for the corresponding variable, suggesting time-dependent effects.

A C-index of 0.716 suggests that the model has good discriminatory power, meaning it can correctly rank the survival times for about 71.6% of the pairs.The standard error is 0.01075, indicating a narrow range of variability in the concordance estimate, suggesting robust performance. The reduction in the C-index (from 73.9% to 71.6%) indicates a trade-off between model complexity and performance.

dev_residuals <- residuals(cox_model, type = "deviance")
plot(dev_residuals, main = "Deviance Residuals", ylab = "Residuals", xlab = "Index")
abline(h = c(-2, 2), col = "red", lty = 2) 

surv_fit <- survfit(Surv(survival_months, status) ~ 1, data = final)

plot(surv_fit, xlab = "Time (months)", ylab = "Survival Probability", 
     main = "Survival Curve for the Final Model", col = "blue", lwd = 2)

grid()